Environment

load R environment

library(tidyverse)
library(Seurat)

Load IPF Cell Atlas data

Spapros is expecting scanpy format data with raw counts. Load based on established R workflow then convert. ### Read in GSE136831

original.dir <- ('~/vcalab-files/datasets/GSE136831_IPF_Cell_Atlas/')
metadata <- read_tsv(paste0(original.dir,'GSE136831_AllCells.Samples.CellType.MetadataTable.txt.gz'))
Rows: 312928 Columns: 9── Column specification ─────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (7): CellBarcode_Identity, CellType_Category, Manuscript_Identity, Subclass_Cell_Identity, Disease_Identi...
dbl (2): nUMI, nGene
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
metadata <- as.data.frame(metadata)
rownames(metadata) <- metadata$CellBarcode_Identity

features <- read_tsv(paste0(original.dir,'GSE136831_AllCells.GeneIDs.txt.gz'))
Rows: 45947 Columns: 2── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): Ensembl_GeneID, HGNC_EnsemblAlt_GeneID
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cells <- read_tsv(paste0(original.dir,'GSE136831_AllCells.cellBarcodes.txt.gz'), col_names = F)
Rows: 312928 Columns: 1── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (1): X1
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
counts <- Matrix::readMM(paste0(original.dir,'GSE136831_RawCounts_Sparse.mtx.gz'))
rownames(counts) <- features$HGNC_EnsemblAlt_GeneID
colnames(counts) <- cells$X1
ipfatlas.cells <- CreateSeuratObject(counts=counts, meta.data=metadata)
Warning: Data is of class dgTMatrix. Coercing to dgCMatrix.
rm(counts,cells,features,metadata)
gc()
             used   (Mb) gc trigger    (Mb)   max used    (Mb)
Ncells    4287022  229.0    6682443   356.9    6682443   356.9
Vcells 1053056767 8034.2 3672764273 28021.0 4194266324 31999.8

Standard filters

ipfatlas.cells[['percent.mt']] <- PercentageFeatureSet(ipfatlas.cells, pattern='^MT-')
VlnPlot(ipfatlas.cells, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.

ipfatlas.cells <- subset(ipfatlas.cells, subset = nFeature_RNA > 200 & nFeature_RNA < 5000 & percent.mt < 10)

Convert to scanpy compatible format (anndata)

This is remarkably not a solved problem but sceasy has functions for it. have to back-convert the Seurat5 object to a v3-like assay.

ipfatlas.cells[['RNA']] <- as(ipfatlas.cells[['RNA']],'Assay')
Warning: No layers found matching search pattern providedWarning: No layers found matching search pattern providedWarning: No layers found matching search pattern providedWarning: Layer ‘data’ is emptyWarning: No layers found matching search pattern providedWarning: No layers found matching search pattern providedWarning: Layer ‘scale.data’ is emptyWarning: Assay RNA changing from Assay5 to Assay
sceasy::convertFormat(ipfatlas.cells, from='seurat',to='anndata', outFile = 'ipfatlas.adata')
Warning: Dropping single category variables:orig.ident
AnnData object with n_obs × n_vars = 253424 × 45947
    obs: 'nCount_RNA', 'nFeature_RNA', 'CellBarcode_Identity', 'nUMI', 'nGene', 'CellType_Category', 'Manuscript_Identity', 'Subclass_Cell_Identity', 'Disease_Identity', 'Subject_Identity', 'Library_Identity', 'percent.mt'
    var: 'name'

load python environment

Weird behavior when reticulate instantiated by other packages, therefore restart R.

.rs.restartR()
NULL

Using the SPAPROS workflow documented by theis lab which is python/scanpy based.

reticulate::repl_python()
import pandas as pd
import scanpy as sc
import spapros as sp
sc.settings.verbosity=0
sc.logging.print_header()
scanpy==1.9.6 anndata==0.10.3 umap==0.5.5 numpy==1.26.3 scipy==1.11.4 pandas==1.5.3 scikit-learn==1.1.3 statsmodels==0.14.1 igraph==0.9.11 pynndescent==0.5.11
print(f"spapros=={sp.__version__}")
spapros==0.1.4

Access adata object on the python side. to avoid name conversions throughout python code, just make it “adata.” Unpredictable behavior when porting from R to python environment, so store as h5ad and read back in.

adata = sc.read_h5ad('ipfatlas.adata')
adata
AnnData object with n_obs × n_vars = 253424 × 45947
    obs: 'nCount_RNA', 'nFeature_RNA', 'CellBarcode_Identity', 'nUMI', 'nGene', 'CellType_Category', 'Manuscript_Identity', 'Subclass_Cell_Identity', 'Disease_Identity', 'Subject_Identity', 'Library_Identity', 'percent.mt'
    var: 'name'

SPAPROS probe selection

Preprocess data with scanpy

Filtering has already been done on the Seurat side.

sc.pp.normalize_total(adata, target_sum=10000) # 10k target matches default Seurat scale factor
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata,flavor="cell_ranger",n_top_genes=1000)
sc.pp.pca(adata, svd_solver='lobpcg') # had issues with arpack
/home/vincent/.local/lib/python3.9/site-packages/scipy/sparse/linalg/_eigen/_svds.py:487: UserWarning: Exited at iteration 20 with accuracies 
[4.79425120e-08 4.33860979e-07 4.25535729e-08 1.19035951e-07
 4.12742959e-08 1.13385877e-07 7.08756943e-07 1.37830810e-06
 1.51369674e-07 5.47363570e-07 2.09337811e-06 3.11242987e-07
 1.04559607e-06 8.92120768e-08 2.06874692e-07 1.38487152e-07
 1.12949811e-06 7.70405333e-07 1.15730129e-06 2.44537505e-06
 4.45034612e-07 1.09552891e-06 2.38279297e-07 1.70460495e-06
 4.95124826e-07 4.02370540e-07 2.41765186e-06 6.03523887e-07
 2.19781845e-06 1.21796480e-06 5.84655170e-07 4.03826669e-06
 1.93481355e-06 2.18713447e-06 2.79229665e-06 6.98544531e-06
 1.46472059e-06 4.01444386e-06 3.26535748e-06 1.65977582e-05
 1.99434152e-05 1.90039855e-04 2.84961516e-04 8.43750002e-04
 5.89064658e-03 9.32999730e-03 8.68964326e-03 4.86836385e-02
 1.07320979e-01 1.69766674e+01]
not reaching the requested tolerance 1.4901161193847656e-05.
Use iteration 21 instead with accuracy 
0.3431597728816552.

  _, eigvec = lobpcg(XH_X, X, tol=tol ** 2, maxiter=maxiter,
/home/vincent/.local/lib/python3.9/site-packages/scipy/sparse/linalg/_eigen/_svds.py:487: UserWarning: Exited postprocessing with accuracies 
[4.81829694e-08 4.33156171e-07 4.15085166e-08 1.18443928e-07
 4.10329817e-08 1.13370684e-07 7.08724601e-07 1.37826510e-06
 1.51156757e-07 5.47290646e-07 2.09345190e-06 3.11020503e-07
 1.04560254e-06 8.90149173e-08 2.06879686e-07 1.38642587e-07
 1.12966102e-06 7.70430839e-07 1.15727319e-06 2.44537198e-06
 4.45026163e-07 1.09566625e-06 2.38218842e-07 1.70461422e-06
 4.95173291e-07 4.02321463e-07 2.41768558e-06 6.03567884e-07
 2.19774360e-06 1.21802253e-06 5.84547692e-07 4.03831660e-06
 1.93482576e-06 2.18726418e-06 2.79223102e-06 6.98547527e-06
 1.46476658e-06 4.01439911e-06 3.26532234e-06 1.65977890e-05
 1.99434840e-05 1.90039834e-04 2.84961507e-04 8.43750076e-04
 5.89064660e-03 9.32999723e-03 8.68964326e-03 4.86836385e-02
 1.07320979e-01 1.69766674e+01]
not reaching the requested tolerance 1.4901161193847656e-05.
  _, eigvec = lobpcg(XH_X, X, tol=tol ** 2, maxiter=maxiter,
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=30)
sc.tl.umap(adata)
sc.pl.umap(adata, color=['CellType_Category'])
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
sc.pl.umap(adata, color=['Disease_Identity'])
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(

SPAPROS basic: no constraints.

Asked for 200 genes, because one imagines that various stakeholders will add another 100 for IPF subsets, and probably asthma stakeholders will add another 100 bringing total to 400. Note that this dataset includes normal lung and COPD lung.

selector = sp.se.ProbesetSelector(adata, n=200, celltype_key="Manuscript_Identity", verbosity=0, save_dir=None)
Note: The following celltypes' test set sizes for forest training are below min_test_n (=20):
     Ionocyte : 6
     PNEC     : 12
The genes selected for those cell types potentially don't generalize well. Find the genes for each of those cell types in self.genes_of_primary_trees after running self.select_probeset().
selector.select_probeset()
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/joblib/externals/loky/process_executor.py:752: UserWarning: A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.
  warnings.warn(

Export probes for simulations

selector.probeset[selector.probeset.selection].to_csv('SPAPROS-basic200.csv')

Quick summary of selected probes

What is the distribution of chosen probes among methods?

selector.plot_gene_overlap()
/home/vincent/.local/lib/python3.9/site-packages/spapros/plotting/plot.py:1241: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.
  plt.tight_layout()

I don’t understand what the 4th column is, 33 genes that have no attribution for the method of selection?

# list probes
selector.probeset.index[selector.probeset.selection]
Index(['TPSB2', 'CCL21', 'GRP', 'EMP2', 'HYDIN', 'MIR205HG', 'TPM2', 'ZNF385D',
       'FCN3', 'S100B',
       ...
       'COL6A2', 'CXCL5', 'TIMP3', 'AGBL4', 'ADAM19', 'BATF', 'LIMCH1',
       'SKAP1', 'CTSW', 'ABLIM1'],
      dtype='object', length=200)

Table of characteristics for selected probes

probe_candidates = selector.probeset[selector.probeset.selection]
probe_candidates
        gene_nr  selection  ...  required_marker  required_list_marker
TPSB2         1       True  ...             True                 False
CCL21         2       True  ...             True                 False
GRP           3       True  ...             True                 False
EMP2          4       True  ...             True                 False
HYDIN         5       True  ...             True                 False
...         ...        ...  ...              ...                   ...
BATF        196       True  ...            False                 False
LIMCH1      197       True  ...            False                 False
SKAP1       198       True  ...            False                 False
CTSW        199       True  ...            False                 False
ABLIM1      200       True  ...            False                 False

[200 rows x 17 columns]

Dotplots of probeset split by selection method

Cluster markers (DE 1vs all)

probes = probe_candidates[(probe_candidates['celltypes_DE_1vsall']!='')]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:40], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)
sc.pl.dotplot(adata, probes.index[40::], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)

Nuanced cluster markers (celltypes_DE_specific)

probes = probe_candidates[(probe_candidates['celltypes_DE_specific']!='')]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:25], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)
sc.pl.dotplot(adata, probes.index[25::], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)

PCA based (celltypes_DE_specific)

probes = probe_candidates[(probe_candidates['pca_selected'])]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:33], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)
sc.pl.dotplot(adata, probes.index[33:66], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)

sc.pl.dotplot(adata, probes.index[66::], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)

SPAPROS with expression penalties

Penalties are 0 to 1, where 0 is bad and 1 is best (i.e., no penalty). Behavior of penalties seems to depend on the the source. If identified by the package, it’s a multiplier. If the source is from a manually curated list, all genes with any penalty is removed.

We were told by 10x that SCGB1A1 and SFTPC had too high abundance to design probes. COL1A1 is the highest single detected gene in trial runs; I am not aware of any codeword budget issues with COL1A1.

sc.pl.violin(adata, ['SCGB1A1', 'SFTPC', 'COL1A1'])

sc.pl.violin(adata, ['GDF15', 'CLDN4', 'KRT8'])

If we wanted to be fairly aggressive, a threshold of like 1 to 6 would get us genes that are like COL1A1 or even more highly expressed. Some high performance genes like KRT8 wouldn’t necessarily be selected.

# Set thresholds
lower_th = 1.0
upper_th = 6
FACTOR = 0.1

# Calculate quantiles
sp.ut.get_expression_quantile(adata, q=0.99, normalise=False, log1p=False, zeros_to_nan=False)
sp.ut.get_expression_quantile(adata, q=0.9, normalise=False, log1p=False, zeros_to_nan=True)

# Get penalty functions for given factor
penalty_fcts[f"lower_{FACTOR}"] = sp.ut.plateau_penalty_kernel(var=0.1 * FACTOR, x_min=lower_th, x_max=None)
Traceback (most recent call last):
  File "<string>", line 1, in <module>
NameError: name 'penalty_fcts' is not defined
penalty_fcts[f"upper_{FACTOR}"] = sp.ut.plateau_penalty_kernel(var=0.5*FACTOR, x_min=None, x_max=upper_th)
Traceback (most recent call last):
  File "<string>", line 1, in <module>
NameError: name 'penalty_fcts' is not defined
# Calculate each gene's penalty value
adata.var[f"expr_penalty_lower_{FACTOR}"] = penalty_fcts[f"lower_{FACTOR}"](adata.var['quantile_0.9 expr > 0'])
Traceback (most recent call last):
  File "<string>", line 1, in <module>
NameError: name 'penalty_fcts' is not defined
adata.var[f"expr_penalty_upper_{FACTOR}"] = penalty_fcts[f"upper_{FACTOR}"](adata.var['quantile_0.99'])
Traceback (most recent call last):
  File "<string>", line 1, in <module>
NameError: name 'penalty_fcts' is not defined
# PCA and DE selections with penalties
penalty_keys = [f"expr_penalty_lower_{FACTOR}",f"expr_penalty_upper_{FACTOR}"]

adata.var["expr_penalty_lower"] = adata.var[f"expr_penalty_lower_{FACTOR}"]
Traceback (most recent call last):
  File "]8;line = 3802;file:///home/vincent/.local/lib/python3.9/site-packages/pandas/core/indexes/base.py/home/vincent/.local/lib/python3.9/site-packages/pandas/core/indexes/base.py]8;;", line 3802, in get_loc
    return self._engine.get_loc(casted_key)
  File "pandas/_libs/index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 165, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 5745, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 5753, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'expr_penalty_lower_0.1'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "]8;line = 3807;file:///home/vincent/.local/lib/python3.9/site-packages/pandas/core/frame.py/home/vincent/.local/lib/python3.9/site-packages/pandas/core/frame.py]8;;", line 3807, in __getitem__
    indexer = self.columns.get_loc(key)
  File "]8;line = 3804;file:///home/vincent/.local/lib/python3.9/site-packages/pandas/core/indexes/base.py/home/vincent/.local/lib/python3.9/site-packages/pandas/core/indexes/base.py]8;;", line 3804, in get_loc
    raise KeyError(key) from err
KeyError: 'expr_penalty_lower_0.1'
adata.var["expr_penalty_upper"] = adata.var[f"expr_penalty_upper_{FACTOR}"]
Traceback (most recent call last):
  File "]8;line = 3802;file:///home/vincent/.local/lib/python3.9/site-packages/pandas/core/indexes/base.py/home/vincent/.local/lib/python3.9/site-packages/pandas/core/indexes/base.py]8;;", line 3802, in get_loc
    return self._engine.get_loc(casted_key)
  File "pandas/_libs/index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 165, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 5745, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 5753, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'expr_penalty_upper_0.1'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "]8;line = 3807;file:///home/vincent/.local/lib/python3.9/site-packages/pandas/core/frame.py/home/vincent/.local/lib/python3.9/site-packages/pandas/core/frame.py]8;;", line 3807, in __getitem__
    indexer = self.columns.get_loc(key)
  File "]8;line = 3804;file:///home/vincent/.local/lib/python3.9/site-packages/pandas/core/indexes/base.py/home/vincent/.local/lib/python3.9/site-packages/pandas/core/indexes/base.py]8;;", line 3804, in get_loc
    raise KeyError(key) from err
KeyError: 'expr_penalty_upper_0.1'

Run the selection. In this run we have not seeded any specific number of genes, curious what it comes up with.

# create an instance of the ProbesetSelector class
selector_highexpression = sp.se.ProbesetSelector(
    adata,
    n=None,
    celltype_key="Manuscript_Identity",
    verbosity=1,
    save_dir=None,
    pca_penalties=["expr_penalty_lower", "expr_penalty_upper"],
    DE_penalties=["expr_penalty_lower", "expr_penalty_upper"],
    m_penalties_adata_celltypes=["expr_penalty_lower", "expr_penalty_upper"],
    m_penalties_list_celltypes=["expr_penalty_upper"],
)
Note: The following celltypes' test set sizes for forest training are below min_test_n (=20):
     Ionocyte : 6
     PNEC     : 12
The genes selected for those cell types potentially don't generalize well. Find the genes for each of those cell types in self.genes_of_primary_trees after running self.select_probeset().
selector_highexpression.select_probeset()
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
SPAPROS PROBESET SELECTION:                                                     
Select pca genes.......................................... ━━━━━━━  100% 0:00:07
Train baseline forest based on DE genes................... ━━━━━━━   4/4 1:16:20
  Select DE genes......................................... ━━━━━━━ 39/39 0:00:00
  Train prior forest for DE_baseline forest............... ━━━━━━━   3/3 0:14:04
  Iteratively add DE genes to DE_baseline forest.......... ━━━━━━━   3/3 0:46:53
  Train final baseline forest on all celltypes............ ━━━━━━━   3/3 0:14:41
Train final forests....................................... ━━━━━━━   3/3 1:26:01
  Train forest on pre/prior/pca selected genes............ ━━━━━━━   3/3 0:14:59
  Iteratively add genes from DE_baseline_forest........... ━━━━━━━ 12/12 0:56:25
  Train final forest on all celltypes..................... ━━━━━━━   3/3 0:14:36
Compile probeset list..................................... ━━━━━━━  100% 0:00:00
FINISHED  
          

Export probes for simulations

selector_highexpression.probeset[selector_highexpression.probeset.selection].to_csv('SPAPROS-highexpression.csv')

Quick summary of selected probes

What is the distribution of chosen probes among methods?

selector_highexpression.plot_gene_overlap()
/home/vincent/.local/lib/python3.9/site-packages/spapros/plotting/plot.py:1241: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.
  plt.tight_layout()

# list probes
selector_highexpression.probeset.index[selector_highexpression.probeset.selection]
Index(['TPSB2', 'CCL21', 'GRP', 'EMP2', 'HYDIN', 'MIR205HG', 'TPM2', 'ZNF385D',
       'FCN3', 'S100B',
       ...
       'CALD1', 'NCALD', 'SLC38A1', 'KRT7', 'LRRIQ1', 'PTPRB', 'ADGRL2',
       'MYL9', 'IL2RA', 'IL1RL1'],
      dtype='object', length=124)

Table of characteristics for selected probes

probe_candidates = selector_highexpression.probeset[selector_highexpression.probeset.selection]
probe_candidates
        gene_nr  selection  ...  required_marker  required_list_marker
TPSB2         1       True  ...             True                 False
CCL21         2       True  ...             True                 False
GRP           3       True  ...             True                 False
EMP2          4       True  ...             True                 False
HYDIN         5       True  ...             True                 False
...         ...        ...  ...              ...                   ...
PTPRB       120       True  ...             True                 False
ADGRL2      121       True  ...             True                 False
MYL9        122       True  ...             True                 False
IL2RA       123       True  ...             True                 False
IL1RL1      124       True  ...             True                 False

[124 rows x 17 columns]

Dotplots of probeset split by selection method

Cluster markers (DE 1vs all)

probes = probe_candidates[(probe_candidates['celltypes_DE_1vsall']!='')]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:40], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)
sc.pl.dotplot(adata, probes.index[40::], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)

Nuanced cluster markers (celltypes_DE_specific)

probes = probe_candidates[(probe_candidates['celltypes_DE_specific']!='')]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:15], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)
sc.pl.dotplot(adata, probes.index[15::], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_utils.py:1140: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`). Consider using `matplotlib.pyplot.close()`.
  fig = pl.figure(figsize=ax_or_figsize)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)

PCA based (celltypes_DE_specific)

probes = probe_candidates[(probe_candidates['pca_selected'])]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:33], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)
sc.pl.dotplot(adata, probes.index[33:66], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)

sc.pl.dotplot(adata, probes.index[66::], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
  dot_ax.scatter(x, y, **kwds)

Probset cross-correlation

How well are probes correlated with each other?

Probeset performance evaluation within SPAPROS

Set up evaluation.

evaluator = sp.ev.ProbesetEvaluator(adata, verbosity=2, results_dir=None)

Reference: trial lung panel

xenium_panel1_genes = pd.read_csv('Xenium_panel_order_08_10_23.csv')['Gene']
evaluator.evaluate_probeset(xenium_panel1_genes, set_id="Xenium Lung Trial")

SPAPROS basic probeset

evaluator.evaluate_probeset(selector.probeset.index[selector.probeset.selection], set_id="SPAPROS basic")

SPAPROS high expression minimal probeset

evaluator.evaluate_probeset(selector_highexpression.probeset.index[selector_highexpression.probeset.selection].tolist(), set_id="SPAPROS high expression minimal")
/home/vincent/.local/lib/python3.9/site-packages/numpy/lib/function_base.py:2897: RuntimeWarning: invalid value encountered in divide
  c /= stddev[:, None]
/home/vincent/.local/lib/python3.9/site-packages/numpy/lib/function_base.py:2898: RuntimeWarning: invalid value encountered in divide
  c /= stddev[None, :]
The following cell types are not included in forest classifications since they 
have fewer than 40 cells: ['Ionocyte']
SPAPROS PROBESET EVALUATION:                                                    
Shared metric computations................................ ━━━━━━━   3/3 1:18:41
  Computing shared compuations for knn_overlap............ ━━━━━━━   6/6 0:55:43
  Computing shared compuations for gene_corr.............. ━━━━━━━  100% 0:22:52
Probeset specific pre computations........................ ━━━━━━━   3/3 0:57:38
  Computing pre compuations for knn_overlap............... ━━━━━━━   6/6 0:57:36
Final probeset specific computations...................... ━━━━━━━   3/3 3:51:33
  Computing final compuations for knn_overlap............. ━━━━━━━   6/6 0:00:21
  Computing final compuations for forest_clfs............. ━━━━━━━ 25/25 3:51:11
  Computing final compuations for gene_corr............... ━━━━━━━  100% 0:00:00
FINISHED  
          
evaluator.summary_statistics()
Traceback (most recent call last):
  File "<string>", line 1, in <module>
TypeError: summary_statistics() missing 1 required positional argument: 'set_ids'
evaluator.plot_summary()

# vignette uses a different method, this is the one in the wrapper functions listing
evaluator.plot_marker_corr()
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "]8;line = 1195;file:///home/vincent/.local/lib/python3.9/site-packages/spapros/evaluation/evaluation.py/home/vincent/.local/lib/python3.9/site-packages/spapros/evaluation/evaluation.py]8;;", line 1195, in plot_marker_corr
    raise ValueError("Can't plot marker correlations since no results are found.")
ValueError: Can't plot marker correlations since no results are found.
LS0tCnRpdGxlOiAiUHJvYmUgc2VsZWN0aW9uIGZvciBJUEYgbHVuZyB1c2luZyBTUEFQUk9TIgpvdXRwdXQ6CiAgZ2l0aHViX2RvY3VtZW50OgogICAgdG9jOiB0cnVlCiAgaHRtbF9ub3RlYm9vazoKICAgIHRvYzogdHJ1ZQotLS0gCgojIEVudmlyb25tZW50CgojIyBsb2FkIFIgZW52aXJvbm1lbnQKYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0KbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoU2V1cmF0KQpgYGAKCiMjIExvYWQgSVBGIENlbGwgQXRsYXMgZGF0YQpTcGFwcm9zIGlzIGV4cGVjdGluZyBzY2FucHkgZm9ybWF0IGRhdGEgd2l0aCByYXcgY291bnRzLiBMb2FkIGJhc2VkIG9uIGVzdGFibGlzaGVkIFIgd29ya2Zsb3cgdGhlbiBjb252ZXJ0LgojIyMgUmVhZCBpbiBHU0UxMzY4MzEKYGBge3J9Cm9yaWdpbmFsLmRpciA8LSAoJ34vdmNhbGFiLWZpbGVzL2RhdGFzZXRzL0dTRTEzNjgzMV9JUEZfQ2VsbF9BdGxhcy8nKQptZXRhZGF0YSA8LSByZWFkX3RzdihwYXN0ZTAob3JpZ2luYWwuZGlyLCdHU0UxMzY4MzFfQWxsQ2VsbHMuU2FtcGxlcy5DZWxsVHlwZS5NZXRhZGF0YVRhYmxlLnR4dC5neicpKQptZXRhZGF0YSA8LSBhcy5kYXRhLmZyYW1lKG1ldGFkYXRhKQpyb3duYW1lcyhtZXRhZGF0YSkgPC0gbWV0YWRhdGEkQ2VsbEJhcmNvZGVfSWRlbnRpdHkKCmZlYXR1cmVzIDwtIHJlYWRfdHN2KHBhc3RlMChvcmlnaW5hbC5kaXIsJ0dTRTEzNjgzMV9BbGxDZWxscy5HZW5lSURzLnR4dC5neicpKQpjZWxscyA8LSByZWFkX3RzdihwYXN0ZTAob3JpZ2luYWwuZGlyLCdHU0UxMzY4MzFfQWxsQ2VsbHMuY2VsbEJhcmNvZGVzLnR4dC5neicpLCBjb2xfbmFtZXMgPSBGKQoKY291bnRzIDwtIE1hdHJpeDo6cmVhZE1NKHBhc3RlMChvcmlnaW5hbC5kaXIsJ0dTRTEzNjgzMV9SYXdDb3VudHNfU3BhcnNlLm10eC5neicpKQpyb3duYW1lcyhjb3VudHMpIDwtIGZlYXR1cmVzJEhHTkNfRW5zZW1ibEFsdF9HZW5lSUQKY29sbmFtZXMoY291bnRzKSA8LSBjZWxscyRYMQpgYGAKYGBge3J9CmlwZmF0bGFzLmNlbGxzIDwtIENyZWF0ZVNldXJhdE9iamVjdChjb3VudHM9Y291bnRzLCBtZXRhLmRhdGE9bWV0YWRhdGEpCmBgYApgYGB7cn0Kcm0oY291bnRzLGNlbGxzLGZlYXR1cmVzLG1ldGFkYXRhKQpnYygpCmBgYAojIyMgU3RhbmRhcmQgZmlsdGVycwpgYGB7cn0KaXBmYXRsYXMuY2VsbHNbWydwZXJjZW50Lm10J11dIDwtIFBlcmNlbnRhZ2VGZWF0dXJlU2V0KGlwZmF0bGFzLmNlbGxzLCBwYXR0ZXJuPSdeTVQtJykKVmxuUGxvdChpcGZhdGxhcy5jZWxscywgZmVhdHVyZXMgPSBjKCJuRmVhdHVyZV9STkEiLCAibkNvdW50X1JOQSIsICJwZXJjZW50Lm10IiksIG5jb2wgPSAzLCBwdC5zaXplID0gMCkKYGBgCmBgYHtyfQppcGZhdGxhcy5jZWxscyA8LSBzdWJzZXQoaXBmYXRsYXMuY2VsbHMsIHN1YnNldCA9IG5GZWF0dXJlX1JOQSA+IDIwMCAmIG5GZWF0dXJlX1JOQSA8IDUwMDAgJiBwZXJjZW50Lm10IDwgMTApCmBgYAojIyMgQ29udmVydCB0byBzY2FucHkgY29tcGF0aWJsZSBmb3JtYXQgKGFubmRhdGEpClRoaXMgaXMgcmVtYXJrYWJseSBub3QgYSBzb2x2ZWQgcHJvYmxlbSBidXQgc2NlYXN5IGhhcyBmdW5jdGlvbnMgZm9yIGl0LiBoYXZlIHRvIGJhY2stY29udmVydCB0aGUgU2V1cmF0NSBvYmplY3QgdG8gYSB2My1saWtlIGFzc2F5LgpgYGB7cn0KaXBmYXRsYXMuY2VsbHNbWydSTkEnXV0gPC0gYXMoaXBmYXRsYXMuY2VsbHNbWydSTkEnXV0sJ0Fzc2F5JykKc2NlYXN5Ojpjb252ZXJ0Rm9ybWF0KGlwZmF0bGFzLmNlbGxzLCBmcm9tPSdzZXVyYXQnLHRvPSdhbm5kYXRhJywgb3V0RmlsZSA9ICdpcGZhdGxhcy5hZGF0YScpCmBgYAoKCgojIyBsb2FkIHB5dGhvbiBlbnZpcm9ubWVudApXZWlyZCBiZWhhdmlvciB3aGVuIHJldGljdWxhdGUgaW5zdGFudGlhdGVkIGJ5IG90aGVyIHBhY2thZ2VzLCB0aGVyZWZvcmUgcmVzdGFydCBSLgpgYGB7cn0KLnJzLnJlc3RhcnRSKCkKYGBgClVzaW5nIHRoZSBTUEFQUk9TIHdvcmtmbG93IGRvY3VtZW50ZWQgYnkgdGhlaXMgbGFiIHdoaWNoIGlzIHB5dGhvbi9zY2FucHkgYmFzZWQuCmBgYHtweXRob259CmltcG9ydCBwYW5kYXMgYXMgcGQKaW1wb3J0IHNjYW5weSBhcyBzYwppbXBvcnQgc3BhcHJvcyBhcyBzcApzYy5zZXR0aW5ncy52ZXJib3NpdHk9MApzYy5sb2dnaW5nLnByaW50X2hlYWRlcigpCnByaW50KGYic3BhcHJvcz09e3NwLl9fdmVyc2lvbl9ffSIpCmBgYApBY2Nlc3MgYWRhdGEgb2JqZWN0IG9uIHRoZSBweXRob24gc2lkZS4gdG8gYXZvaWQgbmFtZSBjb252ZXJzaW9ucyB0aHJvdWdob3V0IHB5dGhvbiBjb2RlLCBqdXN0IG1ha2UgaXQgImFkYXRhLiIgVW5wcmVkaWN0YWJsZSBiZWhhdmlvciB3aGVuIHBvcnRpbmcgZnJvbSBSIHRvIHB5dGhvbiBlbnZpcm9ubWVudCwgc28gc3RvcmUgYXMgaDVhZCBhbmQgcmVhZCBiYWNrIGluLgpgYGB7cHl0aG9ufQphZGF0YSA9IHNjLnJlYWRfaDVhZCgnaXBmYXRsYXMuYWRhdGEnKQphZGF0YQpgYGAKIyBTUEFQUk9TIHByb2JlIHNlbGVjdGlvbgojIyBQcmVwcm9jZXNzIGRhdGEgd2l0aCBzY2FucHkKRmlsdGVyaW5nIGhhcyBhbHJlYWR5IGJlZW4gZG9uZSBvbiB0aGUgU2V1cmF0IHNpZGUuCmBgYHtweXRob259CnNjLnBwLm5vcm1hbGl6ZV90b3RhbChhZGF0YSwgdGFyZ2V0X3N1bT0xMDAwMCkgIyAxMGsgdGFyZ2V0IG1hdGNoZXMgZGVmYXVsdCBTZXVyYXQgc2NhbGUgZmFjdG9yCnNjLnBwLmxvZzFwKGFkYXRhKQpzYy5wcC5oaWdobHlfdmFyaWFibGVfZ2VuZXMoYWRhdGEsZmxhdm9yPSJjZWxsX3JhbmdlciIsbl90b3BfZ2VuZXM9MTAwMCkKc2MucHAucGNhKGFkYXRhLCBzdmRfc29sdmVyPSdsb2JwY2cnKSAjIGhhZCBpc3N1ZXMgd2l0aCBhcnBhY2sKc2MucHAubmVpZ2hib3JzKGFkYXRhLCBuX25laWdoYm9ycz0xMCwgbl9wY3M9MzApCnNjLnRsLnVtYXAoYWRhdGEpCnNjLnBsLnVtYXAoYWRhdGEsIGNvbG9yPVsnQ2VsbFR5cGVfQ2F0ZWdvcnknXSkKc2MucGwudW1hcChhZGF0YSwgY29sb3I9WydEaXNlYXNlX0lkZW50aXR5J10pCmBgYAojIyBTUEFQUk9TIGJhc2ljOiBubyBjb25zdHJhaW50cy4KQXNrZWQgZm9yIDIwMCBnZW5lcywgYmVjYXVzZSBvbmUgaW1hZ2luZXMgdGhhdCB2YXJpb3VzIHN0YWtlaG9sZGVycyB3aWxsIGFkZCBhbm90aGVyIDEwMCBmb3IgSVBGIHN1YnNldHMsIGFuZCBwcm9iYWJseSBhc3RobWEgc3Rha2Vob2xkZXJzIHdpbGwgYWRkIGFub3RoZXIgMTAwIGJyaW5naW5nIHRvdGFsIHRvIDQwMC4gTm90ZSB0aGF0IHRoaXMgZGF0YXNldCBpbmNsdWRlcyBub3JtYWwgbHVuZyBhbmQgQ09QRCBsdW5nLiAKYGBge3B5dGhvbn0Kc2VsZWN0b3IgPSBzcC5zZS5Qcm9iZXNldFNlbGVjdG9yKGFkYXRhLCBuPTIwMCwgY2VsbHR5cGVfa2V5PSJNYW51c2NyaXB0X0lkZW50aXR5IiwgdmVyYm9zaXR5PTAsIHNhdmVfZGlyPU5vbmUpCnNlbGVjdG9yLnNlbGVjdF9wcm9iZXNldCgpCmBgYAojIyMgRXhwb3J0IHByb2JlcyBmb3Igc2ltdWxhdGlvbnMKYGBge3B5dGhvbn0Kc2VsZWN0b3IucHJvYmVzZXRbc2VsZWN0b3IucHJvYmVzZXQuc2VsZWN0aW9uXS50b19jc3YoJ1NQQVBST1MtYmFzaWMyMDAuY3N2JykKYGBgCgojIyMgUXVpY2sgc3VtbWFyeSBvZiBzZWxlY3RlZCBwcm9iZXMKV2hhdCBpcyB0aGUgZGlzdHJpYnV0aW9uIG9mIGNob3NlbiBwcm9iZXMgYW1vbmcgbWV0aG9kcz8KYGBge3B5dGhvbn0Kc2VsZWN0b3IucGxvdF9nZW5lX292ZXJsYXAoKQpgYGAKSSBkb24ndCB1bmRlcnN0YW5kIHdoYXQgdGhlIDR0aCBjb2x1bW4gaXMsIDMzIGdlbmVzIHRoYXQgaGF2ZSBubyBhdHRyaWJ1dGlvbiBmb3IgdGhlIG1ldGhvZCBvZiBzZWxlY3Rpb24/IApgYGB7cHl0aG9ufQojIGxpc3QgcHJvYmVzCnNlbGVjdG9yLnByb2Jlc2V0LmluZGV4W3NlbGVjdG9yLnByb2Jlc2V0LnNlbGVjdGlvbl0KYGBgClRhYmxlIG9mIGNoYXJhY3RlcmlzdGljcyBmb3Igc2VsZWN0ZWQgcHJvYmVzCmBgYHtweXRob259CnByb2JlX2NhbmRpZGF0ZXMgPSBzZWxlY3Rvci5wcm9iZXNldFtzZWxlY3Rvci5wcm9iZXNldC5zZWxlY3Rpb25dCnByb2JlX2NhbmRpZGF0ZXMKYGBgCiMjIyBEb3RwbG90cyBvZiBwcm9iZXNldCBzcGxpdCBieSBzZWxlY3Rpb24gbWV0aG9kCiMjIyMgQ2x1c3RlciBtYXJrZXJzIChERSAxdnMgYWxsKQpgYGB7cHl0aG9ufQpwcm9iZXMgPSBwcm9iZV9jYW5kaWRhdGVzWyhwcm9iZV9jYW5kaWRhdGVzWydjZWxsdHlwZXNfREVfMXZzYWxsJ10hPScnKV0KcHJvYmVzID0gcHJvYmVzLnNvcnRfdmFsdWVzKCdjZWxsdHlwZXNfREUnKQpzYy5wbC5kb3RwbG90KGFkYXRhLCBwcm9iZXMuaW5kZXhbMDo0MF0sIGdyb3VwYnk9J01hbnVzY3JpcHRfSWRlbnRpdHknLCBkZW5kcm9ncmFtPUZhbHNlKQpzYy5wbC5kb3RwbG90KGFkYXRhLCBwcm9iZXMuaW5kZXhbNDA6Ol0sIGdyb3VwYnk9J01hbnVzY3JpcHRfSWRlbnRpdHknLCBkZW5kcm9ncmFtPUZhbHNlKQpgYGAKIyMjIyBOdWFuY2VkIGNsdXN0ZXIgbWFya2VycyAoY2VsbHR5cGVzX0RFX3NwZWNpZmljKQpgYGB7cHl0aG9ufQpwcm9iZXMgPSBwcm9iZV9jYW5kaWRhdGVzWyhwcm9iZV9jYW5kaWRhdGVzWydjZWxsdHlwZXNfREVfc3BlY2lmaWMnXSE9JycpXQpwcm9iZXMgPSBwcm9iZXMuc29ydF92YWx1ZXMoJ2NlbGx0eXBlc19ERScpCnNjLnBsLmRvdHBsb3QoYWRhdGEsIHByb2Jlcy5pbmRleFswOjI1XSwgZ3JvdXBieT0nTWFudXNjcmlwdF9JZGVudGl0eScsIGRlbmRyb2dyYW09RmFsc2UpCnNjLnBsLmRvdHBsb3QoYWRhdGEsIHByb2Jlcy5pbmRleFsyNTo6XSwgZ3JvdXBieT0nTWFudXNjcmlwdF9JZGVudGl0eScsIGRlbmRyb2dyYW09RmFsc2UpCmBgYAojIyMjIFBDQSBiYXNlZCAoY2VsbHR5cGVzX0RFX3NwZWNpZmljKQpgYGB7cHl0aG9ufQpwcm9iZXMgPSBwcm9iZV9jYW5kaWRhdGVzWyhwcm9iZV9jYW5kaWRhdGVzWydwY2Ffc2VsZWN0ZWQnXSldCnByb2JlcyA9IHByb2Jlcy5zb3J0X3ZhbHVlcygnY2VsbHR5cGVzX0RFJykKc2MucGwuZG90cGxvdChhZGF0YSwgcHJvYmVzLmluZGV4WzA6MzNdLCBncm91cGJ5PSdNYW51c2NyaXB0X0lkZW50aXR5JywgZGVuZHJvZ3JhbT1GYWxzZSkKc2MucGwuZG90cGxvdChhZGF0YSwgcHJvYmVzLmluZGV4WzMzOjY2XSwgZ3JvdXBieT0nTWFudXNjcmlwdF9JZGVudGl0eScsIGRlbmRyb2dyYW09RmFsc2UpCnNjLnBsLmRvdHBsb3QoYWRhdGEsIHByb2Jlcy5pbmRleFs2Njo6XSwgZ3JvdXBieT0nTWFudXNjcmlwdF9JZGVudGl0eScsIGRlbmRyb2dyYW09RmFsc2UpCmBgYAojIyBTUEFQUk9TIHdpdGggZXhwcmVzc2lvbiBwZW5hbHRpZXMKUGVuYWx0aWVzIGFyZSAwIHRvIDEsIHdoZXJlIDAgaXMgYmFkIGFuZCAxIGlzIGJlc3QgKGkuZS4sIG5vIHBlbmFsdHkpLiBCZWhhdmlvciBvZiBwZW5hbHRpZXMgc2VlbXMgdG8gZGVwZW5kIG9uIHRoZSB0aGUgc291cmNlLiBJZiBpZGVudGlmaWVkIGJ5IHRoZSBwYWNrYWdlLCBpdCdzIGEgbXVsdGlwbGllci4gSWYgdGhlIHNvdXJjZSBpcyBmcm9tIGEgbWFudWFsbHkgY3VyYXRlZCBsaXN0LCBhbGwgZ2VuZXMgd2l0aCAqYW55KiBwZW5hbHR5IGlzIHJlbW92ZWQuCgpXZSB3ZXJlIHRvbGQgYnkgMTB4IHRoYXQgU0NHQjFBMSBhbmQgU0ZUUEMgaGFkIHRvbyBoaWdoIGFidW5kYW5jZSB0byBkZXNpZ24gcHJvYmVzLiBDT0wxQTEgaXMgdGhlIGhpZ2hlc3Qgc2luZ2xlIGRldGVjdGVkIGdlbmUgaW4gdHJpYWwgcnVuczsgSSBhbSBub3QgYXdhcmUgb2YgYW55IGNvZGV3b3JkIGJ1ZGdldCBpc3N1ZXMgd2l0aCBDT0wxQTEuCgpgYGB7cHl0aG9ufQpzYy5wbC52aW9saW4oYWRhdGEsIFsnU0NHQjFBMScsICdTRlRQQycsICdDT0wxQTEnXSkKYGBgCmBgYHtweXRob259CnNjLnBsLnZpb2xpbihhZGF0YSwgWydHREYxNScsICdDTERONCcsICdLUlQ4J10pCmBgYApJZiB3ZSB3YW50ZWQgdG8gYmUgZmFpcmx5IGFnZ3Jlc3NpdmUsIGEgdGhyZXNob2xkIG9mIGxpa2UgMSB0byA2IHdvdWxkIGdldCB1cyBnZW5lcyB0aGF0IGFyZSBsaWtlIENPTDFBMSBvciBldmVuIG1vcmUgaGlnaGx5IGV4cHJlc3NlZC4gU29tZSBoaWdoIHBlcmZvcm1hbmNlIGdlbmVzIGxpa2UgS1JUOCB3b3VsZG4ndCBuZWNlc3NhcmlseSBiZSBzZWxlY3RlZC4KYGBge3B5dGhvbn0KIyBTZXQgdGhyZXNob2xkcwpsb3dlcl90aCA9IDEuMAp1cHBlcl90aCA9IDYKRkFDVE9SID0gMC4xCgojIENhbGN1bGF0ZSBxdWFudGlsZXMKc3AudXQuZ2V0X2V4cHJlc3Npb25fcXVhbnRpbGUoYWRhdGEsIHE9MC45OSwgbm9ybWFsaXNlPUZhbHNlLCBsb2cxcD1GYWxzZSwgemVyb3NfdG9fbmFuPUZhbHNlKQpzcC51dC5nZXRfZXhwcmVzc2lvbl9xdWFudGlsZShhZGF0YSwgcT0wLjksIG5vcm1hbGlzZT1GYWxzZSwgbG9nMXA9RmFsc2UsIHplcm9zX3RvX25hbj1UcnVlKQoKIyBHZXQgcGVuYWx0eSBmdW5jdGlvbnMgZm9yIGdpdmVuIGZhY3RvcgpwZW5hbHR5X2ZjdHM9e30KcGVuYWx0eV9mY3RzW2YibG93ZXJfe0ZBQ1RPUn0iXSA9IHNwLnV0LnBsYXRlYXVfcGVuYWx0eV9rZXJuZWwodmFyPTAuMSAqIEZBQ1RPUiwgeF9taW49bG93ZXJfdGgsIHhfbWF4PU5vbmUpCnBlbmFsdHlfZmN0c1tmInVwcGVyX3tGQUNUT1J9Il0gPSBzcC51dC5wbGF0ZWF1X3BlbmFsdHlfa2VybmVsKHZhcj0wLjUqRkFDVE9SLCB4X21pbj1Ob25lLCB4X21heD11cHBlcl90aCkKIyBDYWxjdWxhdGUgZWFjaCBnZW5lJ3MgcGVuYWx0eSB2YWx1ZQphZGF0YS52YXJbZiJleHByX3BlbmFsdHlfbG93ZXJfe0ZBQ1RPUn0iXSA9IHBlbmFsdHlfZmN0c1tmImxvd2VyX3tGQUNUT1J9Il0oYWRhdGEudmFyWydxdWFudGlsZV8wLjkgZXhwciA+IDAnXSkKYWRhdGEudmFyW2YiZXhwcl9wZW5hbHR5X3VwcGVyX3tGQUNUT1J9Il0gPSBwZW5hbHR5X2ZjdHNbZiJ1cHBlcl97RkFDVE9SfSJdKGFkYXRhLnZhclsncXVhbnRpbGVfMC45OSddKQojIFBDQSBhbmQgREUgc2VsZWN0aW9ucyB3aXRoIHBlbmFsdGllcwpwZW5hbHR5X2tleXMgPSBbZiJleHByX3BlbmFsdHlfbG93ZXJfe0ZBQ1RPUn0iLGYiZXhwcl9wZW5hbHR5X3VwcGVyX3tGQUNUT1J9Il0KCmFkYXRhLnZhclsiZXhwcl9wZW5hbHR5X2xvd2VyIl0gPSBhZGF0YS52YXJbZiJleHByX3BlbmFsdHlfbG93ZXJfe0ZBQ1RPUn0iXQphZGF0YS52YXJbImV4cHJfcGVuYWx0eV91cHBlciJdID0gYWRhdGEudmFyW2YiZXhwcl9wZW5hbHR5X3VwcGVyX3tGQUNUT1J9Il0KCmBgYApSdW4gdGhlIHNlbGVjdGlvbi4gSW4gdGhpcyBydW4gd2UgaGF2ZSBub3Qgc2VlZGVkIGFueSBzcGVjaWZpYyBudW1iZXIgb2YgZ2VuZXMsIGN1cmlvdXMgd2hhdCBpdCBjb21lcyB1cCB3aXRoLgpgYGB7cHl0aG9ufQojIGNyZWF0ZSBhbiBpbnN0YW5jZSBvZiB0aGUgUHJvYmVzZXRTZWxlY3RvciBjbGFzcwpzZWxlY3Rvcl9oaWdoZXhwcmVzc2lvbiA9IHNwLnNlLlByb2Jlc2V0U2VsZWN0b3IoCiAgICBhZGF0YSwKICAgIG49Tm9uZSwKICAgIGNlbGx0eXBlX2tleT0iTWFudXNjcmlwdF9JZGVudGl0eSIsCiAgICB2ZXJib3NpdHk9MSwKICAgIHNhdmVfZGlyPU5vbmUsCiAgICBwY2FfcGVuYWx0aWVzPVsiZXhwcl9wZW5hbHR5X2xvd2VyIiwgImV4cHJfcGVuYWx0eV91cHBlciJdLAogICAgREVfcGVuYWx0aWVzPVsiZXhwcl9wZW5hbHR5X2xvd2VyIiwgImV4cHJfcGVuYWx0eV91cHBlciJdLAogICAgbV9wZW5hbHRpZXNfYWRhdGFfY2VsbHR5cGVzPVsiZXhwcl9wZW5hbHR5X2xvd2VyIiwgImV4cHJfcGVuYWx0eV91cHBlciJdLAogICAgbV9wZW5hbHRpZXNfbGlzdF9jZWxsdHlwZXM9WyJleHByX3BlbmFsdHlfdXBwZXIiXSwKKQoKYGBgCmBgYHtweXRob259CnNlbGVjdG9yX2hpZ2hleHByZXNzaW9uLnNlbGVjdF9wcm9iZXNldCgpCmBgYAojIyMgRXhwb3J0IHByb2JlcyBmb3Igc2ltdWxhdGlvbnMKYGBge3B5dGhvbn0Kc2VsZWN0b3JfaGlnaGV4cHJlc3Npb24ucHJvYmVzZXRbc2VsZWN0b3JfaGlnaGV4cHJlc3Npb24ucHJvYmVzZXQuc2VsZWN0aW9uXS50b19jc3YoJ1NQQVBST1MtaGlnaGV4cHJlc3Npb24uY3N2JykKYGBgCgojIyMgUXVpY2sgc3VtbWFyeSBvZiBzZWxlY3RlZCBwcm9iZXMKV2hhdCBpcyB0aGUgZGlzdHJpYnV0aW9uIG9mIGNob3NlbiBwcm9iZXMgYW1vbmcgbWV0aG9kcz8KYGBge3B5dGhvbn0Kc2VsZWN0b3JfaGlnaGV4cHJlc3Npb24ucGxvdF9nZW5lX292ZXJsYXAoKQpgYGAKCmBgYHtweXRob259CiMgbGlzdCBwcm9iZXMKc2VsZWN0b3JfaGlnaGV4cHJlc3Npb24ucHJvYmVzZXQuaW5kZXhbc2VsZWN0b3JfaGlnaGV4cHJlc3Npb24ucHJvYmVzZXQuc2VsZWN0aW9uXQpgYGAKVGFibGUgb2YgY2hhcmFjdGVyaXN0aWNzIGZvciBzZWxlY3RlZCBwcm9iZXMKYGBge3B5dGhvbn0KcHJvYmVfY2FuZGlkYXRlcyA9IHNlbGVjdG9yX2hpZ2hleHByZXNzaW9uLnByb2Jlc2V0W3NlbGVjdG9yX2hpZ2hleHByZXNzaW9uLnByb2Jlc2V0LnNlbGVjdGlvbl0KcHJvYmVfY2FuZGlkYXRlcwpgYGAKIyMjIERvdHBsb3RzIG9mIHByb2Jlc2V0IHNwbGl0IGJ5IHNlbGVjdGlvbiBtZXRob2QKIyMjIyBDbHVzdGVyIG1hcmtlcnMgKERFIDF2cyBhbGwpCmBgYHtweXRob259CnByb2JlcyA9IHByb2JlX2NhbmRpZGF0ZXNbKHByb2JlX2NhbmRpZGF0ZXNbJ2NlbGx0eXBlc19ERV8xdnNhbGwnXSE9JycpXQpwcm9iZXMgPSBwcm9iZXMuc29ydF92YWx1ZXMoJ2NlbGx0eXBlc19ERScpCnNjLnBsLmRvdHBsb3QoYWRhdGEsIHByb2Jlcy5pbmRleFswOjQwXSwgZ3JvdXBieT0nTWFudXNjcmlwdF9JZGVudGl0eScsIGRlbmRyb2dyYW09RmFsc2UpCnNjLnBsLmRvdHBsb3QoYWRhdGEsIHByb2Jlcy5pbmRleFs0MDo6XSwgZ3JvdXBieT0nTWFudXNjcmlwdF9JZGVudGl0eScsIGRlbmRyb2dyYW09RmFsc2UpCmBgYAojIyMjIE51YW5jZWQgY2x1c3RlciBtYXJrZXJzIChjZWxsdHlwZXNfREVfc3BlY2lmaWMpCmBgYHtweXRob259CnByb2JlcyA9IHByb2JlX2NhbmRpZGF0ZXNbKHByb2JlX2NhbmRpZGF0ZXNbJ2NlbGx0eXBlc19ERV9zcGVjaWZpYyddIT0nJyldCnByb2JlcyA9IHByb2Jlcy5zb3J0X3ZhbHVlcygnY2VsbHR5cGVzX0RFJykKc2MucGwuZG90cGxvdChhZGF0YSwgcHJvYmVzLmluZGV4WzA6MTVdLCBncm91cGJ5PSdNYW51c2NyaXB0X0lkZW50aXR5JywgZGVuZHJvZ3JhbT1GYWxzZSkKc2MucGwuZG90cGxvdChhZGF0YSwgcHJvYmVzLmluZGV4WzE1OjpdLCBncm91cGJ5PSdNYW51c2NyaXB0X0lkZW50aXR5JywgZGVuZHJvZ3JhbT1GYWxzZSkKYGBgCiMjIyMgUENBIGJhc2VkIChjZWxsdHlwZXNfREVfc3BlY2lmaWMpCmBgYHtweXRob259CnByb2JlcyA9IHByb2JlX2NhbmRpZGF0ZXNbKHByb2JlX2NhbmRpZGF0ZXNbJ3BjYV9zZWxlY3RlZCddKV0KcHJvYmVzID0gcHJvYmVzLnNvcnRfdmFsdWVzKCdjZWxsdHlwZXNfREUnKQpzYy5wbC5kb3RwbG90KGFkYXRhLCBwcm9iZXMuaW5kZXhbMDozM10sIGdyb3VwYnk9J01hbnVzY3JpcHRfSWRlbnRpdHknLCBkZW5kcm9ncmFtPUZhbHNlKQpzYy5wbC5kb3RwbG90KGFkYXRhLCBwcm9iZXMuaW5kZXhbMzM6NjZdLCBncm91cGJ5PSdNYW51c2NyaXB0X0lkZW50aXR5JywgZGVuZHJvZ3JhbT1GYWxzZSkKc2MucGwuZG90cGxvdChhZGF0YSwgcHJvYmVzLmluZGV4WzY2OjpdLCBncm91cGJ5PSdNYW51c2NyaXB0X0lkZW50aXR5JywgZGVuZHJvZ3JhbT1GYWxzZSkKYGBgCiMjIyMgUHJvYnNldCBjcm9zcy1jb3JyZWxhdGlvbgpIb3cgd2VsbCBhcmUgcHJvYmVzIGNvcnJlbGF0ZWQgd2l0aCBlYWNoIG90aGVyPwpgYGB7cHl0aG9ufQoKYGBgCgoKIyBQcm9iZXNldCBwZXJmb3JtYW5jZSBldmFsdWF0aW9uIHdpdGhpbiBTUEFQUk9TClNldCB1cCBldmFsdWF0aW9uLgpgYGB7cHl0aG9ufQojIEkgdGhpbmsgdGhlcmUgaXMgYXQgbGVhc3Qgb25lIGJ1ZyBpbiB0aGUgY29kZSB0aGF0IGV4cGVjdHMgImNlbGx0eXBlIiBhcyBhbiBlbGVtZW50LgphZGF0YS5vYnNbJ2NlbGx0eXBlJ109YWRhdGEub2JzWydNYW51c2NyaXB0X0lkZW50aXR5J10KCiMgaW5zdGFudGlhdGUgZXZhbHVhdG9yCmV2YWx1YXRvciA9IHNwLmV2LlByb2Jlc2V0RXZhbHVhdG9yKGFkYXRhLCBjZWxsdHlwZV9rZXk9J01hbnVzY3JpcHRfSWRlbnRpdHknLCBtZXRyaWNzPXsnY2x1c3Rlcl9zaW1pbGFyaXR5Jywna25uX292ZXJsYXAnLCdmb3Jlc3RfY2xmcycsJ21hcmtlcl9jb3JyJywnZ2VuZV9jb3JyJ30sIHZlcmJvc2l0eT0yLCByZXN1bHRzX2Rpcj1Ob25lKQpgYGAKCiMjIyMgUmVmZXJlbmNlOiB0cmlhbCBsdW5nIHBhbmVsCmBgYHtweXRob259Cnhlbml1bV9wYW5lbDFfZ2VuZXMgPSBwZC5yZWFkX2NzdignWGVuaXVtX3BhbmVsX29yZGVyXzA4XzEwXzIzLmNzdicpWydHZW5lJ10KYGBgCmBgYHtweXRob24sIGV2YWw9Rn0KZXZhbHVhdG9yLmV2YWx1YXRlX3Byb2Jlc2V0KHhlbml1bV9wYW5lbDFfZ2VuZXMsIHNldF9pZD0iWGVuaXVtIEx1bmcgVHJpYWwiKQpgYGAKIyMjIyBTUEFQUk9TIGJhc2ljIHByb2Jlc2V0CmBgYHtweXRob24sIGV2YWw9Rn0KZXZhbHVhdG9yLmV2YWx1YXRlX3Byb2Jlc2V0KHNlbGVjdG9yLnByb2Jlc2V0LmluZGV4W3NlbGVjdG9yLnByb2Jlc2V0LnNlbGVjdGlvbl0sIHNldF9pZD0iU1BBUFJPUyBiYXNpYyIpCmBgYAojIyMjIFNQQVBST1MgaGlnaCBleHByZXNzaW9uIG1pbmltYWwgcHJvYmVzZXQKYGBge3B5dGhvbn0KZXZhbHVhdG9yLmV2YWx1YXRlX3Byb2Jlc2V0KHNlbGVjdG9yX2hpZ2hleHByZXNzaW9uLnByb2Jlc2V0LmluZGV4W3NlbGVjdG9yX2hpZ2hleHByZXNzaW9uLnByb2Jlc2V0LnNlbGVjdGlvbl0udG9saXN0KCksIHNldF9pZD0iU1BBUFJPUyBoaWdoIGV4cHJlc3Npb24gbWluaW1hbCIpCmBgYApgYGB7cHl0aG9ufQpldmFsdWF0b3Iuc3VtbWFyeV9zdGF0aXN0aWNzKCkKZXZhbHVhdG9yLnBsb3Rfc3VtbWFyeSgpCmBgYAoKYGBge3B5dGhvbn0KIyB2aWduZXR0ZSB1c2VzIGEgZGlmZmVyZW50IG1ldGhvZCwgdGhpcyBpcyB0aGUgb25lIGluIHRoZSB3cmFwcGVyIGZ1bmN0aW9ucyBsaXN0aW5nCmV2YWx1YXRvci5wbG90X21hcmtlcl9jb3JyKCkKYGBgCgo=